// © 2019 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
package org.unicode.icu.tool.cldrtoicu.mapper;

import static com.google.common.base.Preconditions.checkNotNull;
import static org.unicode.cldr.api.AttributeKey.keyOf;
import static org.unicode.cldr.api.CldrData.PathOrder.DTD;

import java.util.Optional;

import org.unicode.cldr.api.AttributeKey;
import org.unicode.cldr.api.CldrData;
import org.unicode.cldr.api.CldrDataType;
import org.unicode.cldr.api.CldrPath;
import org.unicode.cldr.api.CldrValue;
import org.unicode.icu.tool.cldrtoicu.IcuData;
import org.unicode.icu.tool.cldrtoicu.RbPath;
import org.unicode.icu.tool.cldrtoicu.CldrDataProcessor;

import com.google.common.escape.UnicodeEscaper;

/**
 * A mapper to collect plural data from {@link CldrDataType#LDML LDML} data via the paths:
 * <pre>{@code
 *   //ldml/rbnf/rulesetGrouping[@type=*]/ruleset[@type=*]
 * }</pre>
 */
public final class RbnfMapper {
    private static final AttributeKey GROUPING_TYPE = keyOf("rulesetGrouping", "type");
    private static final AttributeKey RULESET_TYPE = keyOf("ruleset", "type");
    private static final AttributeKey RULESET_ACCESS = keyOf("ruleset", "access");
    private static final AttributeKey RBNF_VALUE = keyOf("rbnfrule", "value");
    private static final AttributeKey RBNF_RADIX = keyOf("rbnfrule", "radix");

    // This is the ICU path prefix, below which everything generated by this visitor will go.
    private static final RbPath RB_ROOT = RbPath.of("RBNFRules");

    private static final CldrDataProcessor<RbnfMapper> RBNF_PROCESSOR;
    static {
        CldrDataProcessor.Builder<RbnfMapper> processor = CldrDataProcessor.builder();
        processor
            .addAction(
                "//ldml/rbnf/rulesetGrouping[@type=*]/ruleset[@type=*]", (m, p) -> m.new Ruleset(p))
            .addValueAction("rbnfrule", Ruleset::addRule);
        RBNF_PROCESSOR = processor.build();
    }

    /**
     * Processes data from the given supplier to generate RBNF data for a set of locale IDs.
     *
     * @param icuData the ICU data to be filled.
     * @param cldrData the unresolved CLDR data to process.
     * @param icuSpecialData additional ICU data (in the "icu:" namespace)
     * @return IcuData containing RBNF data for the given locale ID.
     */
    public static IcuData process(
        IcuData icuData, CldrData cldrData, Optional<CldrData> icuSpecialData) {

        // Using DTD order is essential here because the RBNF paths contain ordered elements,
        // so we must ensure that they appear in sorted order (otherwise we'd have to do more
        // work at this end to re-sort the results).
        RbnfMapper mapper = new RbnfMapper(icuData);
        icuSpecialData.ifPresent(s -> RBNF_PROCESSOR.process(s, mapper, DTD));
        RBNF_PROCESSOR.process(cldrData, mapper, DTD);
        return mapper.icuData;
    }

    private final IcuData icuData;

    private RbnfMapper(IcuData icuData) {
        this.icuData = checkNotNull(icuData);
    }

    private class Ruleset {
        private final RbPath rbPath;
        private final String rulesetType;
        private final boolean isStrict;
        private boolean hasHeader = false;

        Ruleset(CldrPath prefix) {
            this.rbPath = RB_ROOT.extendBy(GROUPING_TYPE.valueFrom(prefix));
            this.rulesetType = RULESET_TYPE.valueFrom(prefix);
            this.isStrict = !"lenient-parse".equals(rulesetType);
        }

        void addRule(CldrValue value) {
            // This is a bit hacky because the access attribute lives on the parent path element,
            // but we cannot use it until we visit the child values (because it's a value attribute
            // and will not be in the prefix path) so we need to add the header only once here.
            if (!hasHeader) {
                boolean isPrivate = RULESET_ACCESS.valueFrom(value, "public").equals("private");
                icuData.add(rbPath, (isPrivate ? "%%" : "%") + rulesetType + ":");
                hasHeader = true;
            }
            // Prefix is: "@value: ", "@value/@radix: " or empty (for non strict rules).
            String rulePrefix = isStrict
                ? RBNF_VALUE.valueFrom(value)
                    + RBNF_RADIX.optionalValueFrom(value).map(r -> "/" + r).orElse("")
                    + ": "
                : "";
            icuData.add(rbPath, rulePrefix + ESCAPE_RBNF_DATA.escape(value.getValue()));
        }
    }

    /*
     * Convert characters outside the range U+0020 to U+007F to Unicode escapes, and convert
     * backslash to a double backslash. This class is super slow for non-ASCII escaping due to
     * using "String.format()", however there's < 100 values that need any escaping, so it's
     * fine.
     */
    private static final UnicodeEscaper ESCAPE_RBNF_DATA = new UnicodeEscaper() {
        private final char[] DOUBLE_BACKSLASH = "\\\\".toCharArray();
        private final char[] LEFT_ANGLE = "<".toCharArray();
        private final char[] RIGHT_ANGLE = ">".toCharArray();

        @Override
        protected char[] escape(int cp) {
            // Returning null means "do not escape".
            switch (cp) {
            case '\\':
                return DOUBLE_BACKSLASH;
            case '←':
                return LEFT_ANGLE;
            case '→':
                return RIGHT_ANGLE;
            default:
                if (0x0020 <= cp && cp <= 0x007F) {
                    return null;
                } else if (cp <= 0xFFFF) {
                    return String.format("\\u%04X", cp).toCharArray();
                }
                return String.format("\\U%08X", cp).toCharArray();
            }
        }
    };
}
